  set.seed(12345)
  rm(list=ls())
  library(knitr)
  opts_chunk$set(cache=FALSE)

## Quick fix for stargazer <= 5.2.3 is.na() issue with long model names in R >= 4.2
## Otherwise script won't run
  # # Unload stargazer if loaded
  # detach("package:stargazer",unload=T)
  # # Delete it
  # remove.packages("stargazer")
  # # Download the source
  # download.file("https://cran.r-project.org/src/contrib/stargazer_5.2.3.tar.gz", destfile = "stargazer_5.2.3.tar.gz")
  # # Unpack
  # untar("stargazer_5.2.3.tar.gz")
  # # Read the sourcefile with .inside.bracket fun
  # stargazer_src <- readLines("stargazer/R/stargazer-internal.R")
  # # Move the length check 5 lines up so it precedes is.na(.)
  # stargazer_src[1990] <- stargazer_src[1995]
  # stargazer_src[1995] <- ""
  # # Save back
  # writeLines(stargazer_src, con="stargazer/R/stargazer-internal.R")
  # # Compile and install the patched package
  # install.packages("stargazer", repos = NULL, type="source")
  
  
# DATA 
    # European Social Survey: http://www.europeansocialsurvey.org/
  
  # cat(paste("#", capture.output(sessionInfo()), "\n", collapse =""))

# Load packages ####
  # install.packages("pacman")
  pacman::p_load(plyr,
  dplyr,
  stringr,
  tidyr,
  haven,
  stargazer,
  stringr,
  corrr,
  bindrcpp,
  gridExtra,
  readr,
  kableExtra,
  tidyverse,
  gt,
  modelsummary,
  patchwork)

# RUN: "CREATE DATA.R"





# These functions are taken from package SMDTools (not available anymore)
# https://github.com/cran/SDMTools/blob/master/R/wt.mean.R
# Similar functions from Hmisc package returned an error

wt.mean <- function(x,wt) {
	s = which(is.finite(x*wt)); wt = wt[s]; x = x[s] #remove NA info
	return( sum(wt * x)/sum(wt) ) #return the mean
}


wt.var <- function(x,wt) {
	s = which(is.finite(x + wt)); wt = wt[s]; x = x[s] #remove NA info
	xbar = wt.mean(x,wt) #get the weighted mean
	return( sum(wt *(x-xbar)^2)*(sum(wt)/(sum(wt)^2-sum(wt^2))) ) #return the variance
} 


wt.sd <- function(x,wt) { 
	return( sqrt(wt.var(x,wt)) ) #return the standard deviation
} 




  

# Load data ####
  data <- read_csv("input_data.csv")

# Female ####
  data <- data %>% mutate(female = recode(gender, "2" = "1", "1" = "0")) %>%
        mutate(female = as.numeric(female))
  table(data$gender, data$female)
  

  
# Recode trust values higher 10 to NA
  data <- data %>% mutate_at(vars(trust_parliament:trust_un), # recode
            function(x) ifelse(x > 10, NA, x))
  table(data$trust_euparliament)

# Age #### 
  data <- data %>% 
          mutate(age = ifelse(age > 123, NA, age))  %>% 
          mutate(age = ifelse(age < 18, NA, age)) %>%
          mutate(age_cat = as.numeric(as.character(cut(age, breaks = c(-1, 35, 64, 100.5), labels = c("0", "NA", "1"))))) %>%
          mutate(age_cat_3 = cut(age, breaks = c(-1, 35, 64, 100.5), labels = c("young", "middle", "old"))) %>%
    mutate(age_cat_young_middle = recode(age_cat_3, "young" = 0, "middle" = 1)) %>%
    mutate(age_cat_middle_old = recode(age_cat_3, "middle" = 0, "old" = 1))
  #table(data$age, data$age_cat)
  #table(data$age_cat_3, data$age_cat_young_middle)
  # table(data$age_cat_3, data$age_cat_middle_old)
  
# Household income feelings ####
# Recode: Respondent’s feelings about the household’s current income 
  # (perceived higher income [i.e., the respondent reports coping on his or her present income or living comfortably on it] used as reference category versus perceived low income [i.e., the respondent reports finding it difficult or very difficult to cope on his or her present income]);
  
  # 1 Living comfortably on present income	
  # 2	Coping on present income	
  # 3	Difficult on present income	
  # 4	Very difficult on present income	
  data <- data %>% 
          mutate(hincome_feeling = ifelse(hincome_feeling > 4, NA, hincome_feeling)) %>% 
          mutate(hincome_cat = hincome_feeling) %>%
          mutate(hincome_cat = 
                   as.numeric(as.character(cut(hincome_cat, breaks = c(-1, 2.2, 2.9, 4.1), labels = c("0", NA, "1"))))) %>%
          mutate(hincome_cat_4 = 
                   cut(hincome_feeling, breaks = c(-1, 1.1, 2.1, 3.1, 4.1), labels = c("Comfortably (1)", "Coping (2)", "Difficult (3)", "Very difficult (4)")
                                               ))
                   
          
  table(data$hincome_feeling, data$hincome_cat)

  
# Employment status ####
    # the respondent’s employment status (in paid work or in education as reference 
    # category versus unemployed, retired or other non-employed);
  data <- data %>% 
          mutate(mainact = ifelse(mainact > 9, NA, mainact))%>% 
          mutate(paid_work_cat = mainact)
  data$paid_work_cat <- as.numeric(dplyr::recode(data$paid_work_cat, 
                           1 == 1,
                           2 == 1,
                           3 == 0,
                           4 == 0,
                           5 == NA,
                           6 == NA,
                           7 == NA,
                           8 == NA,
                           9 == 0)) 
  
  data$mainact <- recode(as.character(data$mainact),
      "1"	= "Paid work (1)", 
      "2"	= "Education (2)",
      "3"	= "Looking for job (3)",
      "4"	= "Unempl., not looking (4)",
      "5"	= "Permanently sick (5)",
      "6"	= "Retired (6)",
      "7"	= "Commun./milit. service (7)",
      "8"	= "Housework (8)",
      "9"	= "Other (9)")
  

  #table(data$mainact, data$paid_work_cat)
      # 1	Paid work	(1)
      # 2	Education	(1)
      # 3	Unemployed, looking for job	(0)
      # 4	Unemployed, not looking for job	(0)
      # 5	Permanently sick or disabled (NA)
      # 6	Retired	 (0)
      # 7	Community or military service (NA)
      # 8	Housework, looking after children, others	(NA)
      # 9	Other	(0)
      # 66	Not applicable
      # 77	Refusal
      # 88	Don't know
      # 99	No answer
  
  # Education ####
  # Education years
    # 77	Refusal
    # 88	Don't know
    # 99	No answer
  # data <- data %>% 
  #         mutate(education_years = ifelse(eduyrs > 56, NA, eduyrs)) %>%
  #         mutate(education_cat = as.numeric(as.character(cut(education_years, breaks = c(-1, 8, 13.9, 57), labels = c("0", NA, "1")))))
  # table(data$education_years)
  # table(data$education_cat)
  #   table(data$education_years, data$education_cat)
  #   
  # data <- data %>% 
  #         mutate(education_cat_low_middle = as.numeric(as.character(cut(education_years, breaks = c(-1, 8, 13.9, 57), labels = c("0", "1", NA)))))
  #       table(data$education_years, data$education_cat_low_middle)
  # 
  # data <- data %>% 
  #         mutate(education_cat_middle_high = as.numeric(as.character(cut(education_years, breaks = c(-1, 8, 13.9, 57), labels = c(NA, "0", "1")))))
  #     table(data$education_years, data$education_cat_middle_high)


    # education - eisced
    #  (lower education [i.e., International Standard Classification of Education (ISCED) 0, 1 and 2] contrasted with higher education [i.e., ISCED 3-6] used as reference)
  
  # http://nesstar.ess.nsd.uib.no/webview/index.jsp?v=2&submode=variable&study=http%3A%2F%2F129.177.90.83%3A80%2Fobj%2FfStudy%2FESS5e03.4&gs=undefined&variable=http%3A%2F%2F129.177.90.83%3A80%2Fobj%2FfVariable%2FESS5e03.4_V363&mode=documentation&top=yes
  
     data$eisced[data$eisced==0|data$eisced==55|data$eisced==77|data$eisced==88|data$eisced==99] <- NA # 0 = Not possible to harmonise into ES-ISCED
     data$eisced <- data$eisced - 1
     
     
     data$eisced_labelled <- recode(data$eisced,
                                    "0" = "ES-ISCED I",
                                    "1" = "ES-ISCED II",
                                    "2" = "ES-ISCED IIIb",
                                    "3" = "ES-ISCED IIIa",
                                    "4" = "ES-ISCED IV",
                                    "5" = "ES-ISCED V1",
                                    "6" = "ES-ISCED V2")
     
     
     data <- data %>% 
          mutate(education_eisced = as.numeric(as.character(cut(eisced, breaks = c(-1, 3.5, 10), labels = c("0", "1")))))
     # table(data$education_eisced, data$eisced)
     
     # 1	ES-ISCED I , less than lower secondary = 0
     # 2	ES-ISCED II, lower secondary = 0
     # 3	ES-ISCED IIIb, lower tier upper secondary = 0
     # 4	ES-ISCED IIIa, upper tier upper secondary = NA
     # 5	ES-ISCED IV, advanced vocational, sub-degree = 1
     # 6	ES-ISCED V1, lower tertiary education, BA level = 1
     # 7	ES-ISCED V2, higher tertiary education, >= MA level = 1
 
# Try this variable for between group polarization
     

  
  
      
      

    
# leftright 
  data <- data %>% 
          mutate(leftright = ifelse(leftright > 10, NA, leftright))  %>% 
          mutate(leftright = ifelse(leftright < 0, NA, leftright)) %>%
          mutate(ideology_cat = as.numeric(as.character(cut(leftright, breaks = c(-1, 4.5, 5.5, 10.5), labels = c("0", "NA", "1"))))) 
  #table(data$leftright, data$ideology_cat)
    


      
  data <- data %>% select(cntry, trust_euparliament, trust_parliament, age, age_cat, hincome_cat, paid_work_cat,
                          education_eisced, ideology_cat, year, dweight, pweight, pspwght, hincome_feeling, mainact, hincome_cat_4, age_cat_3, age_cat_young_middle, age_cat_middle_old, eisced_labelled, 
  												leftright
                          )

  


    # SCRAPE COUNTRY CODES FROM WIKIPEDIA
      # library(rvest)
      # countrycodes <- html("http://en.wikipedia.org/wiki/ISO_3166-1", encoding = "UTF-8") %>%
      #   html_nodes(xpath='//*[@id="mw-content-text"]/div/table[2]') %>%
      #        html_table()
      # countrycodes <- countrycodes[[1]]
      # names(countrycodes)[1:2] <- c("country", "cntry")
      # write.table(countrycodes, "data_countrycodes.csv", sep=",") 
      countrycodes <- read.csv("input_data_countrycodes.csv", sep=",")
      data <- left_join(data, countrycodes, by="cntry")  
      data$country <- as.character(data$country)
      data[data$cntry=="XK","country"] <- "Kosovo"
      data$country[data$country=="United Kingdom of Great Britain and Northern Ireland"] <- "United Kingdom"
      names(data) <- str_replace_all(names(data), "\\.", "_")
      # data <- data %>% rename(links = Link_to_ISO_3166_2_subdivision_codes)
      # data <- data %>% select(year, cntry, country, contains("trust_eu"))



# Delete certain countries from the dataset
      data <- data %>% filter(country!="Albania",
                                    country!="Iceland",
                                    country!="Israel",
                                    country!="Kosovo",
                                    country!="Norway",
                                    country!="Russian Federation",
                                    country!="Turkey",
                                    country!="Ukraine",
                                    country!="Croatia",
                                    country!="Switzerland",
                                    country!="Luxembourg",
                                    country!="Serbia",
                                    country!="Montenegro",
                              country!="Macedonia (the former Yugoslav Republic of)"
                              )





library(dplyr)

# Aggregate data ####

# Calculate mean and variance on country level for both national and eu parliaments
  data.agg <- data %>% #select(-dweight, -pweight) %>%
              group_by(year, country) %>% 
              group_by(N_rows = n(), .add = TRUE) %>%
              dplyr::summarize(trust_euparliament_mean = mean(trust_euparliament, na.rm=TRUE),
                        trust_euparliament_var = var(trust_euparliament, na.rm=TRUE),
                        trust_euparliament_missings = sum(is.na(trust_euparliament)),
                        
                        trust_parliament_mean = mean(trust_parliament, na.rm=TRUE),
                        trust_parliament_var = var(trust_parliament, na.rm=TRUE),
                        trust_parliament_missings = sum(is.na(trust_parliament)), 
                        
                        trust_euparliament_mean_w_sdm = wt.mean(trust_euparliament, wt = pspwght),
                        trust_euparliament_var_w_sdm = wt.var(trust_euparliament, wt = pspwght) 

                        ) %>%
    mutate(n_trust_parliament = N_rows - trust_parliament_missings) %>% 
    mutate(n_trust_euparliament = N_rows - trust_euparliament_missings)
                        
                   

              #%>% # Aggregate
              #mutate_at(c("trust_parliament_var", "trust_euparliament_var"), .funs = funs("2" = . * 2)) # ?
  
# WEITER mit
# WEIGHTED MEAN AND VARIANCE results into the appendix
# ADD CONFIDENCE INTERVALS both for weighted mean and var and into the graphs
  # https://stat.ethz.ch/pipermail/r-help/2008-July/168762.html
  # https://rdrr.io/github/hadley/bigvis/man/weighted.var.html
  # https://github.com/hadley/bigvis/

# Add periphery dummy (see Dotti)
  data.agg$periphery <- 0
  data.agg$periphery[data.agg$country %in% c("Spain", "Italy", "Portugal", "Ireland", "Cyprus", "Greece")] <- 1    
  table(data.agg$periphery)


# Add Pre-crisis dummy
  data.agg$time_2006_dummy[data.agg$year<=2006] <- 0
  data.agg$time_2006_dummy[data.agg$year>2006] <- 1
  table(data.agg$time_2006_dummy)

  
  # Considering year 2008 as pre-crisis: 2002-2008=0, 2010-2016=1
    data.agg$time_2008_dummy[data.agg$year<=2008] <- 0
    data.agg$time_2008_dummy[data.agg$year>=2010] <- 1
    table(data.agg$time_2008_dummy) 
    
  # Categorical
    data.agg <- data.agg %>% 
              mutate(time_factor = case_when(
                      year<=2006 ~ 0,
                      year>=2008 & year<=2012 ~ 1,
                      year>=2014 ~ 2)) %>% 
                  mutate(time_factor = 
                  factor(time_factor, 
                         levels=c("0","1","2"), 
                         labels = c("2002-2006","2008-2012","2014-2016")))
    
    
  table(data.agg$time_factor, data.agg$year)

# Calculate and add share of females on the country level
  # Calculate mean and variance on country level for both national and eu parliaments
  # data.agg.female <- data %>% #select(-dweight, -pweight) %>%
  #             group_by(year, country) %>% 
  #             group_by(N_rows = n(), add = TRUE) %>%
  #             summarise_at(vars(female),
  #                          funs(mean), na.rm = TRUE)
  # 
  # data.agg <- left_join(data.agg, data.agg.female, by = c("year", "country"))
  
    
    
# Save longformat data
  #write.table(data.agg, "ESS_aggregated_long.csv", row.names=FALSE, sep=",")
  #write_dta(data.agg, "ESS_aggregated_long.dta")
      
      
# Create wide format dataset (used for Table 1 data across time)
  esswide <- data.agg %>% gather(key = variable,
                                 value = values,
                                 -year,
                                 -country) %>% 
              unite(variable_time, variable, year, sep = "_") %>%
              spread(key = variable_time, value = values)
      
# Save wideformat data
  #write.table(esswide, "ESS_aggregated_wide.csv", row.names=FALSE, sep=",")
  # write_dta(esswide, "ESS_aggregated_wide.dta") 
      




# Worldbank: Unemployment, total (% of total labor force) (modeled ILO estimate)
  # https://data.worldbank.org/indicator/SL.UEM.TOTL.ZS
  data_unemployment <- read.csv("input_API_SL.UEM.TOTL.ZS_DS2_en_csv_v2_5182238.csv",
  															skip = 4)
  data_unemployment <- data_unemployment[,-c(2:4)]
  names(data_unemployment)[1] <- "country"
  names(data_unemployment) <- gsub("^X", "", names(data_unemployment))
  data_unemployment_long <- data_unemployment %>% 
  	select(-64) %>%
  	gather(key = "year", value = "unemployment", -country) %>% 
  	mutate(country = as.character(country)) %>% 
  	mutate(year = as.numeric(year))
  data_unemployment_long$country <- recode(data_unemployment_long$country, 
                                           "Czech Republic" =  "Czechia",
                                           "Slovak Republic" = "Slovakia")

# sort(intersect(unique(data_unemployment_long$country), unique(data.agg$country)))

  
# Worldbank: GDP growth (annual %)
  # https://data.worldbank.org/indicator/NY.GDP.MKTP.KD.ZG
  data_gdp_growth <- read.csv("input_API_NY.GDP.MKTP.KD.ZG_DS2_en_csv_v2_5181578.csv",
  														skip = 4)
  data_gdp_growth <- data_gdp_growth[,-c(2:4)]
  names(data_gdp_growth)[1] <- "country"
  names(data_gdp_growth) <- gsub("^X", "", names(data_gdp_growth))
  data_gdp_growth_long <- data_gdp_growth %>% 
  	select(-64) %>%
  	gather(key = "year", value = "gdp_growth", -country) %>% 
  	mutate(country = as.character(country)) %>% 
  	mutate(year = as.numeric(year))
  data_gdp_growth_long$country <- recode(data_gdp_growth_long$country, 
                                           "Czech Republic" =  "Czechia",
                                           "Slovak Republic" = "Slovakia")
  
  
# Merge unemployment and gdp growth with data.agg
  data.agg$year <- as.numeric(data.agg$year) 
  data.agg <- left_join(data.agg, data_unemployment_long, by = c("year", "country"))  
  data.agg <- left_join(data.agg, data_gdp_growth_long, by = c("year", "country")) 
  

# Table A7 ####
# Generate table that indicates for which years we have data
  x <- esswide %>% select(country, contains("trust_euparliament_mean_")) %>% select(-contains("w_"))
  x <- data.frame(x)
  x <- x %>% mutate_at(vars(contains('trust')), funs(replace(., !is.na(.), "o")))
  x <- x %>% mutate_at(vars(contains('trust')), funs(replace(., is.na(.), "")))
  names(x) <- gsub("trust_euparliament_mean_", "", names(x))
  names(x) <- gsub("country", "Country", names(x))
  row.names(x) <- NULL
  x <- bind_cols(Nr. = 1:nrow(x), x)


stargazer::stargazer(x, type = "latex", 
                     summary = FALSE, 
                     font.size = "scriptsize", 
                     title = "Data across countries, across time",
                     header = FALSE, 
                     label = "tab:data",
                     column.sep.width = "0.2pt",
                     rownames = FALSE,
                     notes = c("Notes: Years and countries for which data on trust in the EP were available."), 
                     table.placement = "ht",
                     out = "Table A7.html")


# Text objects

trust_mean_min <- paste(data.agg %>% ungroup() %>% 
													dplyr::select(trust_euparliament_mean, country, year)  %>% 
													arrange(trust_euparliament_mean) %>% mutate_at(1, round, 2) %>% 
													slice(1) %>% 
													as.character(), collapse = ", ")
# Figure 1 ####
# Generate a graph that visualized mean and polarization over time

# Renaming for ggplot2 longformat: Do this for 4 variables
  data.agg1 <- data.agg %>% select(year, country, trust_euparliament_mean, 
                                   periphery, time_2006_dummy) %>% 
                            mutate(variable = "trust_euparliament_mean") %>% 
                            dplyr::rename(value = trust_euparliament_mean) %>% 
                            mutate(value = as.numeric(value))
  
  data.agg2 <- data.agg %>% select(year, country, trust_euparliament_var, 
                                   periphery, time_2006_dummy) %>% 
                            mutate(variable = "trust_euparliament_var") %>% 
                            dplyr::rename(value = trust_euparliament_var) %>% 
                            mutate(value = as.numeric(value))

  data.agg3 <- data.agg %>% select(year, country, trust_parliament_mean, 
                                   periphery, time_2006_dummy) %>% 
                            mutate(variable = "trust_parliament_mean") %>% 
                            dplyr::rename(value = trust_parliament_mean) %>% 
                            mutate(value = as.numeric(value))
  
  data.agg4 <- data.agg %>% select(year, country, trust_parliament_var, 
                                   periphery, time_2006_dummy) %>% 
                            mutate(variable = "trust_parliament_var") %>% 
                            dplyr::rename(value = trust_parliament_var) %>% 
                            mutate(value = as.numeric(value))

# Row bind datasets
  data.agg.long <- bind_rows(data.agg1, 
                             data.agg2,
                             data.agg3,
                             data.agg4) %>%
    mutate(institution = gsub("trust_|_mean|_var", "", variable)) %>%
    mutate(variable = gsub("euparliament_|parliament_", "", variable)) %>%
    filter(institution!="parliament")


# Filter out countries (with a single observation over time)
  data.agg.long <- data.agg.long %>% filter(variable!="")

  # data.agg.long <- data.agg.long %>% filter(!is.na(value)) # Filter out potential missings
  data.agg.long <- as.data.frame(data.agg.long) # Convert to dataframe class
  data.agg.long$year <- as.numeric(data.agg.long$year) # Convert year variable to numeric
  data.agg.long$variable <- as.factor(data.agg.long$variable) 

# Generate plot
  library(ggplot2)

# Function to change labels for x axis
  gen_newlables <- function(x) {
    x <- gsub("^20", "",x) # replace 20 in year values
    x <- paste(x, "'", sep="")
  }
  
  ggplot(data.agg.long, aes(x=year, y=value, shape = variable)) + #, color = institution
         geom_point(aes(shape=variable), alpha = 0.7) +
         geom_line(alpha = 0.7) +
         facet_wrap(~country) + 
         scale_x_continuous(breaks = seq(2002,2020,6),
                            labels = gen_newlables,
                            name ="Year") + 
         scale_y_continuous(name ="Mean and variance of trust in European Parliament") + 
         # scale_colour_manual(name  ="Institution",
         #                       labels=c("EU Parliament", "Nat. Parliament"),
         #                       values=c("black", "lightgray")) + # "#998ec3"
         scale_shape_manual(name  ="Measure",
                              values = c(16,17),
                               labels=c("Mean", "Variance (polarization)")) + 
         theme_light() +
         theme(axis.text.x = element_text(size=8, angle=45, hjust=1,vjust=1), # 
               legend.position="top")
  


ggsave("Figure 1.png", width = 7, height = 7, dpi = 600)

# https://stackoverflow.com/questions/26917689/how-to-use-facets-with-a-dual-y-axis-ggplot

# Calculate correlations
	data_correlations <- data.agg.long  %>% 
		pivot_wider(names_from = variable, values_from = value) %>%
		group_by(year) %>%
		summarize(correlation = cor(trust_mean, trust_var))
	# Paste correlations below into paper
	paste(paste(round(data_correlations$correlation,2)," (", 
				data_correlations$year, ")", sep=""), collapse = ", ")



# Table 1 ####

library(stargazer)
data.agg$year <- as.numeric(data.agg$year)
data.agg$year_rescaled <- data.agg$year-2002
# Rescale year
fit1 <- lm(trust_euparliament_mean ~ year_rescaled + as.factor(country), data = data.agg)
fit2 <- lm(trust_euparliament_mean ~ year_rescaled + periphery + year_rescaled * periphery + as.factor(country), data = data.agg)

fit3 <- lm(trust_euparliament_var ~ year_rescaled + as.factor(country), data = data.agg)
fit4 <- lm(trust_euparliament_var ~ year_rescaled + periphery + year_rescaled * periphery + as.factor(country), data = data.agg)



stargazer(fit1, fit2, fit3, fit4,
          #fit5, fit6, fit7, fit8,
          type="latex", 
          title = "Over-time changes in trust in the EP",
          omit.stat=c("LL","ser","f","adj.rsq"), 
          omit = c("country"),
          dep.var.caption = "Outcome: Trust in the EP",
          dep.var.labels = c("Mean", "Variance", "Nat. parl.: Mean", "Nat. parl.: Var."),
          #dep.var.labels = c("EP: Mean", "EP: Variance", "Nat. parl.: Mean", "Nat. parl.: Var."),
          ci=FALSE, digits=2, 
          ci.level=0.95,
          single.row=FALSE, 
          label = "tab:trust-trends", 
          covariate.labels = c("Year (0-18)", "Periphery (0,1)", "Year x Periphery"),
          #table.placement="H", 
          column.sep.width = "-7pt",
          align = TRUE,
          column.labels = c("M1", "M2", "M3", "M4"),
          model.names = FALSE,
          model.numbers = FALSE,
          star.cutoffs = c(0.05, 0.01, 0.001),
          notes = c("Data: European Social Survey, 2002-2020.", "The models include country fixed-effects.", "Time variable rescaled so that year '2002' corresponds to value '0'."),
          header=FALSE,
          no.space=TRUE,
          font.size = "footnotesize",
          out = "Table 1.html"
          )





# Create Data_OV ####
# Below we calculate the overlap coefficients for trust_euparliament for each of our contrast variables that divide the sample into two groups each (always names *_cat)

  library(overlapping) # or try overlap package

# Generate overlap data for the variables
  # age_cat, hincome_cat, paid_work_cat, education_cat

variables <- c("age_cat", 
               "hincome_cat", 
               "paid_work_cat",
               #"education_cat",
               #"education_cat_low_middle",
               #"education_cat_middle_high",
               "education_eisced",
               "age_cat_young_middle",
               "age_cat_middle_old",
               "ideology_cat")

for(var_i in variables){

  list <- split(data,list(data$country,data$year,as.data.frame(data[,var_i])[,1]), drop = TRUE)

  for(i in 1:length(list)){
    print(i)
    print(names(list)[i])
    list[[i]] <- list[[i]][,"trust_euparliament"] # Only keep trust variable in list elements
    list[[i]] <- list[[i]] %>% na.omit() # remove missings from trust in list elements
    }
  loop.is <- sub('\\.([^\\.]*)$', '', names(list))
  list.OV <- NULL
  for(z in loop.is){
    tmp <- list(trust.group0 = as.data.frame(list[[paste(z, ".0", sep = "")]])[,1],# list[[paste(z, ".0", sep = "")]] %>% pull(),
                  trust.group1 = as.data.frame(list[[paste(z, ".1", sep = "")]])[,1]) # # list[[paste(z, ".1", sep = "")]] %>% pull()

    
    list.OV[[z]] <- overlap(tmp, nbins = 10, plot = FALSE, partial.plot = FALSE)$OV
  }
  
  results <- data.frame(country = str_extract(names(list.OV), "[^\\.]+"),
                    year = str_extract(names(list.OV), "[0-9][0-9][0-9][0-9]"),
                         varname = unlist(list.OV))
  names(results) <- c("country", "year", paste("overlap_", var_i, sep=""))

  
  assign(paste("results_", var_i, sep=""), results)
  rm(results)
}


   
   
# Merge datasets
 OV <- full_join(results_age_cat, results_hincome_cat, by = c("country", "year"))
 OV <- full_join(OV, results_paid_work_cat, by = c("country", "year"))
 #OV <- full_join(OV, results_education_cat, by = c("country", "year"))
  OV <- full_join(OV, results_age_cat_young_middle, by = c("country", "year")) 
 OV <- full_join(OV, results_age_cat_middle_old, by = c("country", "year")) 
 #OV <- full_join(OV, results_education_cat_low_middle, by = c("country", "year")) 
 #OV <- full_join(OV, results_education_cat_middle_high, by = c("country", "year")) 
 OV <- full_join(OV, results_education_eisced, by = c("country", "year")) 
 OV <- full_join(OV, results_ideology_cat, by = c("country", "year"))
 
 # Merge with aggregated data
 OV$year <- as.numeric(as.character(OV$year))
 data.agg <- left_join(data.agg, OV, by = c("country", "year"))
 
 #write.csv(OV, "data_ov.csv", row.names = FALSE)




# Data Mean difference ####


# Below we calculate the mean differences for trust_euparliament for each of our contrast variables that divide the sample into two groups each (always names *_cat)


variables <- c("age_cat", 
               "hincome_cat", 
               "paid_work_cat",
               "education_eisced",
               "age_cat_young_middle",
               "age_cat_middle_old",
               "ideology_cat")

for(var_i in variables){

  list <- split(data,list(data$country,data$year,data[,var_i] %>% pull()), drop = TRUE)

  for(i in 1:length(list)){
    print(i)
    print(names(list)[i])
    list[[i]] <- list[[i]][,"trust_euparliament"] # Only keep trust variable in list elements
    list[[i]] <- list[[i]] %>% na.omit() # remove missings from trust in list elements
    }
  loop.is <- sub('\\.([^\\.]*)$', '', names(list))
  list.OV <- NULL
  for(z in loop.is){
    tmp <- list(trust.group0 = list[[paste(z, ".0", sep = "")]] %>% pull(),
                  trust.group1 = list[[paste(z, ".1", sep = "")]] %>% pull())

    # mean difference
    
    
    list.OV[[z]] <- abs(mean(tmp$trust.group1)-mean(tmp$trust.group0))
  }
  
  
  results <- data.frame(country = str_extract(names(list.OV), "[^\\.]+"),
                    year = str_extract(names(list.OV), "[0-9][0-9][0-9][0-9]"),
                         varname = unlist(list.OV))
  names(results) <- c("country", "year", paste("meandiff_", var_i, sep=""))

  
  assign(paste("results_", var_i, sep=""), results)
  rm(results)
   
}

# Merge datasets
 OV <- full_join(results_age_cat, results_hincome_cat, by = c("country", "year"))
 OV <- full_join(OV, results_paid_work_cat, by = c("country", "year"))
  OV <- full_join(OV, results_age_cat_young_middle, by = c("country", "year")) 
 OV <- full_join(OV, results_age_cat_middle_old, by = c("country", "year")) 
 OV <- full_join(OV, results_education_eisced, by = c("country", "year")) 
 OV <- full_join(OV, results_ideology_cat, by = c("country", "year")) 

#write.csv(OV, "data_mean_difference.csv", row.names = FALSE)

  # Merge with aggregated data
  OV$year <- as.numeric(as.character(OV$year))
  data.agg <- left_join(data.agg, OV, by = c("country", "year"))





# Rename variables

  names(data.agg) <- gsub("_cat", "", names(data.agg))






# Figure A3 ####
# Create long-format dataset for display with ggplot

  data.agg_long <- data.agg %>% 
                      select(year, country,
                         overlap_age,
                         overlap_hincome,
                         overlap_paid_work,
                         overlap_education_eisced,
                         overlap_ideology)%>% 
    gather(key = "variable", 
                         value = "value", 
                         overlap_age,
                         overlap_hincome,
                         overlap_paid_work,
                         overlap_education_eisced,
                         overlap_ideology)%>% 
  filter(variable!="") %>% 
  filter(!is.na(value)) %>% as.data.frame() %>% mutate(year = as.numeric(year))


library(ggplot2)
data.agg_long$variable <- paste(data.agg_long$variable, " contrast", sep="")
data.agg_long$variable <- gsub("overlap_|_eisced", "", data.agg_long$variable)
data.agg_long$variable <- gsub("paid_work", "work", data.agg_long$variable)
data.agg_long$variable <- gsub("hincome", "hhincome", data.agg_long$variable)

data.agg_long$variable <- gsub("hhincome", "income", data.agg_long$variable)
data.agg_long$variable <- gsub("work", "work status", data.agg_long$variable)

data.agg_long$variable <- Hmisc::capitalize(data.agg_long$variable)
labels <- unique(data.agg_long$variable)

ggplot(data.agg_long, aes(x=year, y=value, color = variable)) + 

        geom_point() +
       geom_line() +
facet_wrap(~country) + 
        scale_x_continuous(breaks = seq(2002,2020,6),
                            labels = gen_newlables,
                           name ="Year") + 
        scale_y_continuous(name ="Between-group polarization of trust in the EU parliament") +
  theme_light() +
        theme(axis.text.x = element_text(size=8, angle=45, hjust=1,vjust=1),
              legend.position="top") + scale_colour_discrete(name  ="Groups")

ggsave("Figure A3-overlap.png", width = 7, height = 7, dpi = 800)



# Table 2 ####
# Model time trend OV ####

data.agg <- data.agg %>% mutate(country = as.factor(country))

library(stargazer)

fit5 <- lm(overlap_age ~ year_rescaled + country, data = data.agg)
fit6 <- lm(overlap_age ~ year_rescaled + periphery + year_rescaled * periphery + country, data = data.agg)

fit7 <- lm(overlap_hincome ~ year_rescaled + country, data = data.agg)
fit8 <- lm(overlap_hincome ~ year_rescaled + periphery + year_rescaled * periphery + country, data = data.agg)

fit9 <- lm(overlap_paid_work ~ year_rescaled + country, data = data.agg)
fit10 <- lm(overlap_paid_work ~ year_rescaled + periphery + year_rescaled * periphery + country, data = data.agg)


fit11 <- lm(overlap_education_eisced ~ year_rescaled + country, data = data.agg)
fit12 <- lm(overlap_education_eisced ~ year_rescaled + periphery + year_rescaled * periphery + country, data = data.agg)

fit13 <- lm(overlap_ideology ~ year_rescaled + country, data = data.agg)
fit14 <- lm(overlap_ideology ~ year_rescaled + periphery + year_rescaled * periphery + country, data = data.agg)




stargazer(fit5, fit6, fit7, fit8, fit9, fit10, fit11, fit12, fit13, fit14,
					type="html", 
					title = "Over-time changes in between-group polarization (overlap)",
					omit.stat=c("LL","ser","f","adj.rsq"), 
					omit = c("country"),
					dep.var.caption = "Outcome: Overlap coefficient",
					dep.var.labels = c("Age groups", "Income groups", "Work groups", "Educ. groups", "Ideo. groups"),
					covariate.labels = c("Year (0-18)", "Periphery (0,1)", "Year x Periphery"),
					ci=FALSE, digits=2, 
					ci.level=0.95,
					single.row=FALSE, 
					label = "tab:OV-trends", 
					#table.placement="H", 
					column.sep.width = "-25pt",
					align = TRUE,
					column.labels = c("M5", "M6", "M7", "M8", 
														"M9", "M10", "M11", "M12", "M13", "M14"),
					model.names = FALSE,
					model.numbers = FALSE,
					star.cutoffs = c(0.05, 0.01, 0.001),
					notes = c("Data: European Social Survey, 2002-2020. The models include country fixed-effects.", "Time variable rescaled so that year '2002' corresponds to value '0'."),
					header=FALSE,
					no.space=TRUE,
					font.size = "footnotesize",
					out = "Table 2.html"
)





# Figure A1: Wording of questions and recoding ####




data$eisced_labelled_fac <- factor(data$eisced_labelled)
levels(data$eisced_labelled_fac) <- c("ES-ISCED I", "ES-ISCED II", "ES-ISCED IIIb", "ES-ISCED IIIa", "ES-ISCED IV", "ES-ISCED V1", "ES-ISCED V2")


# Education 
  p1 <- ggplot(data %>% filter(eisced_labelled_fac!="NA"), aes(eisced_labelled_fac)) + 
    geom_bar(fill = "gray") + labs(x="Education", y = "N") +
       theme_classic(base_size = 18)   +
    theme(axis.text.x = element_text(angle = 45, hjust = 1))


# Living on income
  p2 <- ggplot(data %>% drop_na(hincome_cat_4), aes(hincome_cat_4)) + 
    geom_bar(fill = "gray") + labs(x="Living on income", y = "N") +
       theme_classic(base_size = 18)   +
    theme(axis.text.x = element_text(angle = 45, hjust = 1))


# Unemployment/activity
  p3 <- ggplot(data %>% drop_na(mainact), aes(fct_infreq(mainact))) + geom_bar(fill = "gray") + labs(x="Work status", y = "N") + 
         theme_classic(base_size = 18)   +
    theme(axis.text.x = element_text(angle = 45, hjust = 1))
  
# Age
  p4 <- ggplot(data, aes(age)) + 
    geom_histogram(binwidth=5, fill = "gray") + labs(x="Age", y = "N")+ 
         theme_classic(base_size = 18)

  # Age
  p5 <- ggplot(data %>% drop_na(leftright), 
  						 aes(as.factor(leftright))) + 
  	 geom_bar(fill = "gray") + 
  	labs(x="Ideology (left-right scale)", y = "N")+ 
  	theme_classic(base_size = 18)

  plot_out <- p1 + p2 + p3 + p4 + p5 +
  	plot_layout(ncol = 2, nrow = 3)
  	
  ggsave("Figure A1.png", plot_out)


# Table A2 Summary statistics ####
# data_for_summary <- data.agg %>% ungroup %>% select(-N_rows, -country, -contains("n_trust")) %>% data.frame()
# names(data_for_summary) <- Hmisc::capitalize(gsub("_", " ", names(data_for_summary)))
# 
# 
# 
# modelsummary::datasummary_skim(
#   data_for_summary, 
#   output = "Table A2.html",
#   title = 'Summary statistics (2013)'
#   ) %>%
#   kableExtra::kable_styling(
#     font_size = 8,
#     full_width = F,
#     latex_options = c(
#       "HOLD_position"
#     )
#   )


# Table A2 html ####



# SUMMARY STATISTICS ####
  # for(i in seq(2002, 2016, by = 2)){
  #   stargazer(data %>% filter(year == i) %>% data.frame(),
  #           type="latex", label = paste(str_replace(i, "_", " "), " summary table", sep = ""), font.size="scriptsize", #table.placement="!ht",
  #           column.sep.width = ".2pt" , title = paste("ESS ", str_replace(i, "_", " "), " summary table", sep = ""),
  #           digits = 2, rownames = FALSE,
  #           header=FALSE,
  #           omit.summary.stat = c("p25", "p75"),
  #           table.placement = "!h")
  # }



data_for_summary <- data.agg %>% ungroup %>% select(-N_rows, -country, -contains("n_trust")) %>% data.frame()
names(data_for_summary) <- Hmisc::capitalize(gsub("_", " ", names(data_for_summary)))

    stargazer(data_for_summary, 
            type="latex", font.size="scriptsize", #table.placement="!ht", 
            column.sep.width = ".2pt" , title = "Summary of country level data", 
            digits = 2, rownames = FALSE,
            header=FALSE,
            omit.summary.stat = c("p25", "p75"),
            table.placement = "!h",
          out = "Table A2.html")





# Alternative specifications: Overall polarization ####

# Table A3 ####
library(stargazer)

data.agg$year <- as.numeric(data.agg$year)
data.agg$year_rescaled <- data.agg$year-2002
# Rescale year
fit1 <- lm(trust_euparliament_mean_w_sdm ~ year_rescaled + as.factor(country), data = data.agg)
fit2 <- lm(trust_euparliament_mean_w_sdm ~ year_rescaled + periphery + year_rescaled * periphery + as.factor(country), data = data.agg)

fit3 <- lm(trust_euparliament_var_w_sdm ~ year_rescaled + as.factor(country), data = data.agg)
fit4 <- lm(trust_euparliament_var_w_sdm ~ year_rescaled + periphery + year_rescaled * periphery + as.factor(country), data = data.agg)




stargazer(fit1, fit2, fit3, fit4,
          #fit5, fit6, fit7, fit8,
          type="latex", 
          title = "Over-time changes in trust in the EP (weighted mean and variance)",
          omit.stat=c("LL","ser","f","adj.rsq"), 
          omit = c("country"),
          dep.var.caption = "Outcome: Trust in the EP",
          dep.var.labels = c("Mean", "Variance", "Nat. parl.: Mean", "Nat. parl.: Var."),
          ci=FALSE, digits=2, 
          ci.level=0.95,
          single.row=FALSE, 
          label = "tab:model-time-trend-weighted-sdm", 
          covariate.labels = c("Year (0-18)", "Periphery (0,1)", "Year x Periphery"),
          #table.placement="H", 
          column.sep.width = "-7pt",
          align = TRUE,
          column.labels = c("M15", "M16", "M17", "M18"),
          model.names = FALSE,
          model.numbers = FALSE,
          star.cutoffs = c(0.05, 0.01, 0.001),
          notes = c("Data: European Social Survey, 2002-2020.", "The models include country fixed-effects.", "Time variable rescaled so that year '2002' corresponds to value '0'."),
          header=FALSE,
          no.space=TRUE,
          font.size = "footnotesize",
          out = "Table A3.html"
          )



# Table A4 ####

# Rescale year
fit1 <- lm(trust_euparliament_mean ~ time_2008_dummy + as.factor(country), data = data.agg)
fit2 <- lm(trust_euparliament_mean ~ time_2008_dummy + periphery + time_2008_dummy * periphery + as.factor(country), data = data.agg)

fit3 <- lm(trust_euparliament_var ~ time_2008_dummy + as.factor(country), data = data.agg)
fit4 <- lm(trust_euparliament_var ~ time_2008_dummy + periphery + time_2008_dummy * periphery + as.factor(country), data = data.agg)




stargazer(fit1, fit2, fit3, fit4,
          #fit5, fit6, fit7, fit8,
          type="latex", 
          title = "Over-time changes in trust in the EP (different time specification)",
          omit.stat=c("LL","ser","f","adj.rsq"), 
          omit = c("country"),
          dep.var.caption = "Outcome: Trust in the EP",
          dep.var.labels = c("Mean", "Variance", "Nat. parl.: Mean", "Nat. parl.: Var."),
          ci=FALSE, digits=2, 
          ci.level=0.95,
          single.row=FALSE, 
          label = "tab:model-time-trend-weighted-sdm-time-2008-dummy", 
          covariate.labels = c("Post-crisis (after 2008)", "Periphery (0,1)", "Post-crisis X Periphery"),
          #table.placement="H", 
          column.sep.width = "-7pt",
          align = TRUE,
          column.labels = c("M19", "M20", "M21", "M22"),
          model.names = FALSE,
          model.numbers = FALSE,
          star.cutoffs = c(0.05, 0.01, 0.001),
          notes = c("Data: European Social Survey, 2002-2020.", "The models include country fixed-effects."),
          header=FALSE,
          no.space=TRUE,
          font.size = "footnotesize",
          out = "Table A4.html"
          )



# Table A5 ####

# Table \@ref(tab:trust-trends-control2) and \@ref(tab:trust-trends-control3) reestimate the models in Table \@ref(tab:trust-trends) adding yearly measures of unemployment and gdp growth as control variables.





library(stargazer)
data.agg$year_rescaled <- as.numeric(data.agg$year_rescaled)
fit1 <- lm(trust_euparliament_mean ~ year_rescaled + periphery + gdp_growth + unemployment + as.factor(country), data = data.agg)

fit2 <- lm(trust_euparliament_mean ~  year_rescaled + periphery + gdp_growth + unemployment + trust_parliament_mean + as.factor(country), data = data.agg)

fit3 <- lm(trust_euparliament_mean ~ year_rescaled + periphery + gdp_growth + unemployment + year_rescaled * periphery + as.factor(country), data = data.agg)

fit4 <- lm(trust_euparliament_mean ~  year_rescaled + periphery + gdp_growth + unemployment + trust_parliament_mean + year_rescaled * periphery + as.factor(country), data = data.agg)




# fit3 <- lm(trust_euparliament_var ~ year_rescaled + trust_parliament_var + as.factor(country), data = data.agg)
# fit4 <- lm(trust_euparliament_var ~ year_rescaled + periphery + year_rescaled * periphery + unemployment + gdp_growth + as.factor(country), data = data.agg)
# 


stargazer(fit1, fit2, fit3, fit4,
          type="latex", 
          title = "Time trend: Controlling for unemployment and gdp growth",
          omit.stat=c("LL","ser","f","adj.rsq"), 
          omit = c("country"),
          dep.var.caption = "Outcome: Trust EU parliament (Mean)",
          dep.var.labels.include = FALSE,
          covariate.labels = c("Year (0-18)", "Periphery (0,1)", "Gdp Growth", "Unemployment",  "Trust. nat. parl. (mean)", "Year x Periphery"),
          ci=FALSE, digits=2, 
          ci.level=0.95,
          single.row=FALSE, 
          label = "tab:trust-trends-control2", 
          #table.placement="H", 
          column.sep.width = "-7pt",
          align = TRUE,
          column.labels = c("M23", "M24", "M25", "M26"),
          model.names = FALSE,
          model.numbers = FALSE,
          star.cutoffs = c(0.05, 0.01, 0.001),
          notes = c("Data: European Social Survey; Country fixed-effects, i.e., ","models include country factor variable not shown here."),
          header=FALSE,
          no.space=TRUE,
          font.size = "footnotesize",
          out = "Table A5.html"
          )





# Table A6 ####


library(stargazer)
data.agg$year_rescaled <- as.numeric(data.agg$year_rescaled)
fit1 <- lm(trust_euparliament_var ~ year_rescaled + periphery + gdp_growth + unemployment + as.factor(country), data = data.agg)

fit2 <- lm(trust_euparliament_var ~  year_rescaled + periphery + gdp_growth + unemployment + trust_parliament_var + as.factor(country), data = data.agg)

fit3 <- lm(trust_euparliament_var ~ year_rescaled + periphery + gdp_growth + unemployment + year_rescaled * periphery + as.factor(country), data = data.agg)

fit4 <- lm(trust_euparliament_var ~  year_rescaled + periphery + gdp_growth + unemployment + trust_parliament_var + year_rescaled * periphery + as.factor(country), data = data.agg)



stargazer(fit1, fit2, fit3, fit4,
          type="latex", 
          title = "Time trend: Controlling for unemployment and gdp growth",
          omit.stat=c("LL","ser","f","adj.rsq"), 
          omit = c("country"),
          dep.var.caption = "Outcome: Trust EU parliament (Var.)",
          dep.var.labels.include = FALSE,
covariate.labels = c("Year (0-18)", "Periphery (0,1)", "Gdp Growth", "Unemployment", "Trust. nat. parl. (var.)", "Year x Periphery"),
          ci=FALSE, digits=2, 
          ci.level=0.95,
          single.row=FALSE, 
          label = "tab:trust-trends-control3", 
          #table.placement="H", 
          column.sep.width = "-7pt",
          align = TRUE,
          column.labels = c("M27", "M28", "M29", "M30"),
          model.names = FALSE,
          model.numbers = FALSE,
          star.cutoffs = c(0.05, 0.01, 0.001),
          notes = c("Data: European Social Survey; Country fixed-effects, i.e., ","models include country factor variable not shown here."),
          header=FALSE,
          no.space=TRUE,
          font.size = "footnotesize",
          out = "Table A6.html"
          )


# Figure A2 ####
# Below we depict trends of mean and variance for national-level parliaments alongside the same trends for trust in the EP.



# Generate a graph that visualized mean and polarization over time

# Renaming for ggplot2 longformat: Do this for 4 variables
  data.agg1 <- data.agg %>% select(year, country, trust_euparliament_mean, 
                                   periphery, time_2006_dummy) %>% 
                            mutate(variable = "trust_euparliament_mean") %>% 
                            dplyr::rename(value = trust_euparliament_mean) %>% 
                            mutate(value = as.numeric(value))
  
  data.agg2 <- data.agg %>% select(year, country, trust_euparliament_var, 
                                   periphery, time_2006_dummy) %>% 
                            mutate(variable = "trust_euparliament_var") %>% 
                            dplyr::rename(value = trust_euparliament_var) %>% 
                            mutate(value = as.numeric(value))

  data.agg3 <- data.agg %>% select(year, country, trust_parliament_mean, 
                                   periphery, time_2006_dummy) %>% 
                            mutate(variable = "trust_parliament_mean") %>% 
                            dplyr::rename(value = trust_parliament_mean) %>% 
                            mutate(value = as.numeric(value))
  
  data.agg4 <- data.agg %>% select(year, country, trust_parliament_var, 
                                   periphery, time_2006_dummy) %>% 
                            mutate(variable = "trust_parliament_var") %>% 
                            dplyr::rename(value = trust_parliament_var) %>% 
                            mutate(value = as.numeric(value))

# Row bind datasets
  data.agg.long <- bind_rows(data.agg1, 
                             data.agg2,
                             data.agg3,
                             data.agg4) %>%
    mutate(institution = gsub("trust_|_mean|_var", "", variable)) %>%
    mutate(variable = gsub("euparliament_|parliament_", "", variable))


# Filter out countries (with a single observation over time)
  data.agg.long <- data.agg.long %>% filter(variable!="")

  # data.agg.long <- data.agg.long %>% filter(!is.na(value)) # Filter out potential missings
  data.agg.long <- as.data.frame(data.agg.long) # Convert to dataframe class
  data.agg.long$year <- as.numeric(data.agg.long$year) # Convert year variable to numeric
  data.agg.long$variable <- as.factor(data.agg.long$variable) 
  data.agg.long$country <- factor(data.agg.long$country,
  																	 levels = sort(levels(data.agg.long$country))) 

# Generate plot
  ggplot(data.agg.long, aes(x=year, y=value, shape = variable, color = institution)) + 
         geom_point(aes(shape=variable), alpha = 0.7) +
         geom_line(alpha = 0.7) +
         facet_wrap(~country) + 
         scale_x_continuous(breaks = seq(2002,2020,6),
                            labels = gen_newlables,
                            name ="Year") + 
         scale_y_continuous(name ="Mean and variance of trust") + 
         scale_colour_manual(name  ="Institution",
                               labels=c("European Parliament", "Nat. Parliament"),
                               values=c("#f1a340", "lightgray")) + # "#998ec3"
         scale_shape_manual(name  ="Measure",
                              values = c(16,17),
                               labels=c("Mean", "Variance (polariz.)")) + 
         theme_light() +
         theme(axis.text.x = element_text(size=8, angle=45, hjust=1,vjust=1),
               legend.position="top")
  
  

ggsave("Figure A2-national-parliaments.png", width = 7, height = 7, dpi = 600)
  





# Additional analysis (not in paper) ####
# Alternative specifications: Between-group polarization ####

# Figure A4 ####
# Create long-format dataset for display with ggplot

  data.agg_long <- data.agg %>% 
                      select(year, country,
                         meandiff_age,
                         meandiff_hincome,
                         meandiff_paid_work,
                         meandiff_education_eisced)%>% gather(key = "variable", 
                         value = "value", 
                         meandiff_age,
                         meandiff_hincome,
                         meandiff_paid_work,
                         meandiff_education_eisced)%>% 
  filter(variable!="") %>% 
  filter(!is.na(value)) %>% as.data.frame() %>% mutate(year = as.numeric(year))

data.agg_long$variable <- paste(data.agg_long$variable, " contrast", sep="")
data.agg_long$variable <- gsub("meandiff_|_eisced", "", data.agg_long$variable)
data.agg_long$variable <- gsub("paid_work", "work", data.agg_long$variable)
data.agg_long$variable <- gsub("hincome", "hhincome", data.agg_long$variable)

data.agg_long$variable <- gsub("hhincome", "income", data.agg_long$variable)
data.agg_long$variable <- gsub("work", "work status", data.agg_long$variable)

data.agg_long$variable <- Hmisc::capitalize(data.agg_long$variable)
labels <- unique(data.agg_long$variable)



ggplot(data.agg_long, aes(x=year, y=value, group = variable, color = variable)) + 

        geom_point() +
       geom_line() +
facet_wrap(~country) + 
        scale_x_continuous(breaks = seq(2002,2020,6),
                            labels = gen_newlables,
                           name ="Year") + 
        scale_y_continuous(name ="Mean difference: Between-group polarization of trust in the EU parliament") +
    theme_light() +
        theme(axis.text.x = element_text(size=8, angle=45, hjust=1,vjust=1),
              legend.position="top") + scale_colour_discrete(name  ="Groups")


ggsave("Figure A4 - meandiff.png", width = 7, height = 7, dpi = 600)




# Table A8 ####
# Model mean diff

library(stargazer)


fit1 <- lm(meandiff_age ~ year_rescaled + as.factor(country), data = data.agg)
fit1.2 <- lm(meandiff_age ~ year_rescaled + periphery + year_rescaled * periphery + as.factor(country), data = data.agg)

fit2 <- lm(meandiff_hincome ~ year_rescaled + as.factor(country), data = data.agg)
fit2.2 <- lm(meandiff_hincome ~ year_rescaled + periphery + year_rescaled * periphery + as.factor(country), data = data.agg)

fit3 <- lm(meandiff_paid_work ~ year_rescaled + as.factor(country), data = data.agg)
fit3.2 <- lm(meandiff_paid_work ~ year_rescaled + periphery + year_rescaled * periphery + as.factor(country), data = data.agg)


fit4 <- lm(meandiff_education_eisced ~ year_rescaled + as.factor(country), data = data.agg)
fit4.2 <- lm(meandiff_education_eisced ~ year_rescaled + periphery + year_rescaled * periphery + as.factor(country), data = data.agg)



stargazer(fit1, fit1.2, fit2, fit2.2, 
          fit3, fit3.2, fit4, fit4.2,
          type="latex", 
          title = "Between-group polarization (mean-difference): Time trend",
          omit.stat=c("LL","ser","f","adj.rsq"), 
          omit = c("country"),
          dep.var.caption = "Outcome: Mean difference",
          dep.var.labels = c("Age groups", "Income groups", "Work groups", "Educ. groups"),
          covariate.labels = c("Year (0-18)", "Periphery (0,1)",  "Year x Periphery"),
          ci=FALSE, digits=2, 
          ci.level=0.95,
          single.row=FALSE, 
          label = "tab:OV2-trends", 
          #table.placement="H", 
          column.sep.width = "-15pt",
          align = TRUE,
          column.labels = c("M31", "M32", "M33", "M34",
                            "M35", "M36", "M37", "M38"),
          model.names = FALSE,
          model.numbers = FALSE,
          star.cutoffs = c(0.05, 0.01, 0.001),
          notes = "* Data: European Social Survey, 2002-2018; Country fixed-effects ",
          header=FALSE,
          no.space=TRUE,
          font.size = "footnotesize",
          out = "Table A8.html"
          )


# Table A9 ####
 #Other age group specifications



library(stargazer)

fit1 <- lm(overlap_age_young_middle ~ year_rescaled + as.factor(country), data = data.agg)
fit1.2 <- lm(overlap_age_young_middle ~ year_rescaled + periphery + year_rescaled * periphery + as.factor(country), data = data.agg)


fit2 <- lm(overlap_age_middle_old ~ year_rescaled + as.factor(country), data = data.agg)
fit2.2 <- lm(overlap_age_middle_old ~ year_rescaled + periphery + year_rescaled * periphery + as.factor(country), data = data.agg)





stargazer(fit1, fit1.2, fit2, fit2.2,
          type="latex", 
          title = "Between-group polarization trend: Other age group specifications",
          omit.stat=c("LL","ser","f","adj.rsq"), 
          omit = c("country"),
          dep.var.caption = "Outcome: Overlap coefficient",
          dep.var.labels = c("Age groups (young/middle)","Age groups (middle/old)"),
          ci=FALSE, digits=2, 
          ci.level=0.95,
          single.row=FALSE, 
          covariate.labels = c("Year (0-18)", "Periphery (0,1)",  "Year x Periphery"),
          label = "tab:OV-trends-age", 
          #table.placement="H", 
          # column.sep.width = "-7pt",
          align = TRUE,
          column.labels = c("M39", "M40", "M41", "M42"),
          model.names = FALSE,
          model.numbers = FALSE,
          star.cutoffs = c(0.05, 0.01, 0.001),
          notes = "Data: European Social Survey; Country fixed-effects ",
          header=FALSE,
          no.space=TRUE,
          font.size = "footnotesize",
          out = "Table A9.html"
          )



